Required Libraries
library(readxl)
library(tidyverse)
library(ggplot2)
library(patchwork)
library(fpp2)
library(caret)
library(RANN)
library(VIM)
library(ggpubr)
library(gridExtra)
library(forecast)
De-identified data was provided to conduct a series of six forecasts of different variables of a provided data set. There are two major requirements of this project:
df <- read_excel("data.xls")
head(df)
#> # A tibble: 6 x 7
#> SeriesInd category Var01 Var02 Var03 Var05 Var07
#> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 40669 S03 30.6 123432400 30.3 30.5 30.6
#> 2 40669 S02 10.3 60855800 10.0 10.2 10.3
#> 3 40669 S01 26.6 10369300 25.9 26.2 26.0
#> 4 40669 S06 27.5 39335700 26.8 27.0 27.3
#> 5 40669 S05 69.3 27809100 68.2 68.7 69.2
#> 6 40669 S04 17.2 16587400 16.9 16.9 17.1
# Factoring category to get a count of the elements within dataset
df$category <- as.factor(df$category)
summary(df)
#> SeriesInd category Var01 Var02
#> Min. :40669 S01:1762 Min. : 9.03 Min. : 1339900
#> 1st Qu.:41303 S02:1762 1st Qu.: 23.10 1st Qu.: 12520675
#> Median :41946 S03:1762 Median : 38.44 Median : 21086550
#> Mean :41945 S04:1762 Mean : 46.98 Mean : 37035741
#> 3rd Qu.:42587 S05:1762 3rd Qu.: 66.78 3rd Qu.: 42486700
#> Max. :43221 S06:1762 Max. :195.18 Max. :480879500
#> NA's :854 NA's :842
#> Var03 Var05 Var07
#> Min. : 8.82 Min. : 8.99 Min. : 8.92
#> 1st Qu.: 22.59 1st Qu.: 22.91 1st Qu.: 22.88
#> Median : 37.66 Median : 38.05 Median : 38.05
#> Mean : 46.12 Mean : 46.55 Mean : 46.56
#> 3rd Qu.: 65.88 3rd Qu.: 66.38 3rd Qu.: 66.31
#> Max. :189.36 Max. :195.00 Max. :189.72
#> NA's :866 NA's :866 NA's :866
All groups are equal in length
Columns 5-7 (Var03, Var04, Var07) all have same amount of missing values. Very close quartile and min/max values, comparable to column 3 (Var01). Column 4 (Var02) has values that are significantly larger
str(df)
#> tibble [10,572 x 7] (S3: tbl_df/tbl/data.frame)
#> $ SeriesInd: num [1:10572] 40669 40669 40669 40669 40669 ...
#> $ category : Factor w/ 6 levels "S01","S02","S03",..: 3 2 1 6 5 4 3 2 1 6 ...
#> $ Var01 : num [1:10572] 30.6 10.3 26.6 27.5 69.3 ...
#> $ Var02 : num [1:10572] 1.23e+08 6.09e+07 1.04e+07 3.93e+07 2.78e+07 ...
#> $ Var03 : num [1:10572] 30.3 10.1 25.9 26.8 68.2 ...
#> $ Var05 : num [1:10572] 30.5 10.2 26.2 27 68.7 ...
#> $ Var07 : num [1:10572] 30.6 10.3 26 27.3 69.2 ...
dim(df)
#> [1] 10572 7
paste0(sum(is.na(df))," values missing from original set")
#> [1] "4294 values missing from original set"
# Plots of missing values
aggr_plot <- VIM::aggr(df, col = c("navyblue", "orange"),
numbers = T, sortVars = T,
labels = names(df),
cex.axis = 0.7, gap = 3,
ylab = c("Frequency of Missing Data", "Pattern"))
#>
#> Variables sorted by number of missings:
#> Variable Count
#> Var03 0.08191449
#> Var05 0.08191449
#> Var07 0.08191449
#> Var01 0.08077942
#> Var02 0.07964434
#> SeriesInd 0.00000000
#> category 0.00000000
Creating a shadow matrix to see missing values will indicate whether or not the data is MCAR. 1 indicates all values are present, and anything else indicates the ratio/percentage of missing values correlated among each other \(^2\)
# Shadow Matrix: correlation of missing values from the dataset
x <- as.data.frame(abs(is.na(df)))
y <- x[which(sapply(x, sd) >0)] # Extracts which variables are missing/NA from the dataset
cor(y) # Tendency of NA when correlated among variables
#> Var01 Var02 Var03 Var05 Var07
#> Var01 1.0000000 0.9923369 0.9924341 0.9924341 0.9924341
#> Var02 0.9923369 1.0000000 0.9848290 0.9848290 0.9848290
#> Var03 0.9924341 0.9848290 1.0000000 1.0000000 1.0000000
#> Var05 0.9924341 0.9848290 1.0000000 1.0000000 1.0000000
#> Var07 0.9924341 0.9848290 1.0000000 1.0000000 1.0000000
Aside from considering values correlated with themselves, the following have no missing values when correlated with others:
Var03 has no missing values when correlated with Var05 and Var07
Var05 has no missing values when correlated with Var03 and Var07
Var07 has no missing values when correlated with Var03 and Var05
Taking these observations into consideration, it seems there appears to be bias in the data in the context of missing data. Therefore, it seems appropriate to impute the data for missing values since there is evidence they are not missing completely at random.
preProcess_NAdata_model <- preProcess(as.data.frame(df), method ="medianImpute")
df <- predict(preProcess_NAdata_model, newdata = df)
paste0(sum(is.na(df))," values missing after imputation")
#> [1] "0 values missing after imputation"
summary(df)
#> SeriesInd category Var01 Var02
#> Min. :40669 S01:1762 Min. : 9.03 Min. : 1339900
#> 1st Qu.:41303 S02:1762 1st Qu.: 25.40 1st Qu.: 13073675
#> Median :41946 S03:1762 Median : 38.44 Median : 21086550
#> Mean :41945 S04:1762 Mean : 46.29 Mean : 35765478
#> 3rd Qu.:42587 S05:1762 3rd Qu.: 61.34 3rd Qu.: 39322325
#> Max. :43221 S06:1762 Max. :195.18 Max. :480879500
#> Var03 Var05 Var07
#> Min. : 8.82 Min. : 8.99 Min. : 8.92
#> 1st Qu.: 24.65 1st Qu.: 25.05 1st Qu.: 24.99
#> Median : 37.66 Median : 38.05 Median : 38.05
#> Mean : 45.42 Mean : 45.86 Mean : 45.86
#> 3rd Qu.: 60.30 3rd Qu.: 60.83 3rd Qu.: 60.89
#> Max. :189.36 Max. :195.00 Max. :189.72
# Converting SeriesInd to Datetime
df$SeriesInd <- as.integer(df$SeriesInd)
df$SeriesInd <- as.POSIXct(df$SeriesInd, origin = "1970-01-01")
# Renaming SeriesInd to Date to clarify purpose
df <- df %>% rename("Datetime" = SeriesInd)
summary(df)
#> Datetime category Var01 Var02
#> Min. :1970-01-01 06:17:49 S01:1762 Min. : 9.03 Min. : 1339900
#> 1st Qu.:1970-01-01 06:28:23 S02:1762 1st Qu.: 25.40 1st Qu.: 13073675
#> Median :1970-01-01 06:39:06 S03:1762 Median : 38.44 Median : 21086550
#> Mean :1970-01-01 06:39:04 S04:1762 Mean : 46.29 Mean : 35765478
#> 3rd Qu.:1970-01-01 06:49:47 S05:1762 3rd Qu.: 61.34 3rd Qu.: 39322325
#> Max. :1970-01-01 07:00:21 S06:1762 Max. :195.18 Max. :480879500
#> Var03 Var05 Var07
#> Min. : 8.82 Min. : 8.99 Min. : 8.92
#> 1st Qu.: 24.65 1st Qu.: 25.05 1st Qu.: 24.99
#> Median : 37.66 Median : 38.05 Median : 38.05
#> Mean : 45.42 Mean : 45.86 Mean : 45.86
#> 3rd Qu.: 60.30 3rd Qu.: 60.83 3rd Qu.: 60.89
#> Max. :189.36 Max. :195.00 Max. :189.72
# For forecasting later on
s01 <- df %>% filter(category == "S01")
s02 <- df %>% filter(category == "S02")
s03 <- df %>% filter(category == "S03")
s04 <- df %>% filter(category == "S04")
s05 <- df %>% filter(category == "S05")
s06 <- df %>% filter(category == "S06")
The data set does not appear to have any distinguishing labels that would indicate anything about the source or purpose of the data set. Under normal circumstances, context about data and use case is important for forecasting. Context may help identify the kinds of methods and models that would be best suited to produce an accurate result - following the “no free lunch” principle.
Boxplots: Var02 has the most outliers but this is also the column with the largest range in values
p1 <- ggplot(df, aes(category, Var01)) +
geom_boxplot()
p2 <- ggplot(df, aes(category, Var02)) +
geom_boxplot()
p3 <- ggplot(df, aes(category, Var03)) +
geom_boxplot()
p4 <- ggplot(df, aes(category, Var05)) +
geom_boxplot()
p5 <- ggplot(df, aes(category, Var07)) +
geom_boxplot()
A time series that contain a list of numbers (the measurements), along with some information about what times those numbers were recorded (the index).
Box plots
p1 <- ggplot(df, aes(Var01, fill = category)) +
geom_boxplot(outlier.color = "red", outlier.size = 3)
p2 <- ggplot(df, aes(Var02, fill = category)) +
geom_boxplot(outlier.color = "red", outlier.size = 3)
p3 <- ggplot(df, aes(Var03, fill = category)) +
geom_boxplot(outlier.color = "red", outlier.size = 3)
p4 <- ggplot(df, aes(Var05, fill = category)) +
geom_boxplot(outlier.color = "red", outlier.size = 3)
p5 <- ggplot(df, aes(Var07, fill = category)) +
geom_boxplot(outlier.color = "red", outlier.size = 3)
p6 <- ggplot(df, aes(Datetime, fill = category)) +
geom_boxplot(outlier.color = "red", outlier.size = 10)
(p1+p2)/(p3+p4)/(p5+p6)
All predictors are right skewed so transformations are needed for each column
p1 <- ggplot(df, aes(Var01, fill=category)) +
geom_density(alpha = 0.5)
p2 <- ggplot(df, aes(Var02, fill=category)) +
geom_density(alpha = 0.5)
p3 <- ggplot(df, aes(Var03, fill=category)) +
geom_density(alpha = 0.5)
p4 <- ggplot(df, aes(Var05, fill=category)) +
geom_density(alpha = 0.5)
p5 <- ggplot(df, aes(Var07, fill=category)) +
geom_density(alpha = 0.5)
p1+p2+p3+p4+p5+
plot_layout(ncol = 2)
Var02 is the most skewed
library(moments)
skewness(df$Var01)
#> [1] 0.8334501
skewness(df$Var02)
#> [1] 3.202094
skewness(df$Var03)
#> [1] 0.8342408
skewness(df$Var05)
#> [1] 0.8368652
skewness(df$Var07)
#> [1] 0.8338966
Experimenting with 3 different transformations; log, square root, and cube root. The log transformation looks the most normalized
log_var01 <- log10(df$Var01)
sqrt_var01 <- sqrt(df$Var01)
cube_var01 <- df$Var01^(1/3)
hist(df$Var01)
hist(log_var01)
hist(sqrt_var01)
hist(cube_var01)
Transform all predictors using log transformation
df_transformed <- df
df_transformed$Var01 <- log10(df$Var01)
df_transformed$Var02 <- log10(df$Var02)
df_transformed$Var03 <- log10(df$Var03)
df_transformed$Var05 <- log10(df$Var05)
df_transformed$Var07 <- log10(df$Var07)
New plots using transformed dataframe. Var02 is the most normalize so I will try different transformations with the other columns
p1 <- ggplot(df_transformed, aes(Var01, color=category)) +
geom_density()
p2 <- ggplot(df_transformed, aes(Var02, color=category)) +
geom_density()
p3 <- ggplot(df_transformed, aes(Var03, color=category)) +
geom_density()
p4 <- ggplot(df_transformed, aes(Var05, color=category)) +
geom_density()
p5 <- ggplot(df_transformed, aes(Var07, color=category)) +
geom_density()
p1+p2+p3+p4+p5+
plot_layout(ncol = 2)
s01 <- df_transformed %>% dplyr::filter(category == "S01")
s02 <- df_transformed %>% dplyr::filter(category == "S02")
s03 <- df_transformed %>% dplyr::filter(category == "S03")
s04 <- df_transformed %>% dplyr::filter(category == "S04")
s05 <- df_transformed %>% dplyr::filter(category == "S05")
s06 <- df_transformed %>% dplyr::filter(category == "S06")
Testing conversion using as.Date so each row is a date with no time.
df_test <- read_excel("data.xls")
head(df_test)
#> # A tibble: 6 x 7
#> SeriesInd category Var01 Var02 Var03 Var05 Var07
#> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 40669 S03 30.6 123432400 30.3 30.5 30.6
#> 2 40669 S02 10.3 60855800 10.0 10.2 10.3
#> 3 40669 S01 26.6 10369300 25.9 26.2 26.0
#> 4 40669 S06 27.5 39335700 26.8 27.0 27.3
#> 5 40669 S05 69.3 27809100 68.2 68.7 69.2
#> 6 40669 S04 17.2 16587400 16.9 16.9 17.1
tail(df_test)
#> # A tibble: 6 x 7
#> SeriesInd category Var01 Var02 Var03 Var05 Var07
#> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 43221 S03 NA NA NA NA NA
#> 2 43221 S02 NA NA NA NA NA
#> 3 43221 S01 NA NA NA NA NA
#> 4 43221 S06 NA NA NA NA NA
#> 5 43221 S05 NA NA NA NA NA
#> 6 43221 S04 NA NA NA NA NA
# Converting SeriesInd to Datetime
df_test$SeriesInd <- as.Date(df_test$SeriesInd, origin = "1899-12-30")
# Renaming SeriesInd to Date to clarify purpose
df_test <- df_test %>% rename("Date" = SeriesInd)
summary(df_test)
#> Date category Var01 Var02
#> Min. :2011-05-06 Length:10572 Min. : 9.03 Min. : 1339900
#> 1st Qu.:2013-01-29 Class :character 1st Qu.: 23.10 1st Qu.: 12520675
#> Median :2014-11-03 Mode :character Median : 38.44 Median : 21086550
#> Mean :2014-11-01 Mean : 46.98 Mean : 37035741
#> 3rd Qu.:2016-08-05 3rd Qu.: 66.78 3rd Qu.: 42486700
#> Max. :2018-05-01 Max. :195.18 Max. :480879500
#> NA's :854 NA's :842
#> Var03 Var05 Var07
#> Min. : 8.82 Min. : 8.99 Min. : 8.92
#> 1st Qu.: 22.59 1st Qu.: 22.91 1st Qu.: 22.88
#> Median : 37.66 Median : 38.05 Median : 38.05
#> Mean : 46.12 Mean : 46.55 Mean : 46.56
#> 3rd Qu.: 65.88 3rd Qu.: 66.38 3rd Qu.: 66.31
#> Max. :189.36 Max. :195.00 Max. :189.72
#> NA's :866 NA's :866 NA's :866
#new imputation
preProcess_NAdata_model <- preProcess(as.data.frame(df_test), method ="medianImpute")
df_test <- predict(preProcess_NAdata_model, newdata = df_test)
paste0(sum(is.na(df_test))," values missing after imputation")
#> [1] "0 values missing after imputation"
#new subsets with data conversion
s01_2 <- df %>% filter(category == "S01")
s02_2 <- df %>% filter(category == "S02")
s03_2 <- df %>% filter(category == "S03")
s04_2 <- df %>% filter(category == "S04")
s05_2 <- df %>% filter(category == "S05")
s06_2 <- df %>% filter(category == "S06")
create time series for each subset using the dataframe with dates
s01_ts <- ts(s01_2[,c("Var01","Var02")], frequency = 12, start = c(2011, 5), end = c(2018, 5))
s02_ts <- ts(s02_2[,c("Var02","Var03")], frequency = 12, start = c(2011, 5), end = c(2018, 5))
s03_ts <- ts(s03_2[,c("Var05","Var07")], frequency = 12, start = c(2011, 5), end = c(2018, 5))
s04_ts <- ts(s04_2[,c("Var01","Var02")], frequency = 12, start = c(2011, 5), end = c(2018, 5))
s05_ts <- ts(s05_2[,c("Var02","Var03")], frequency = 12, start = c(2011, 5), end = c(2018, 5))
s06_ts <- ts(s06_2[,c("Var05","Var07")], frequency = 12, start = c(2011, 5), end = c(2018, 5))
Autoplots and seasonal plots for S01
(autoplot(s01_ts, facets = 2)) /
(ggseasonplot(s01_ts[,1], main = "Var01") + ggseasonplot(s01_ts[,2], main = "Var02")) /
(ggsubseriesplot(s01_ts[,1]) + ggsubseriesplot(s01_ts[,2])) /
(ggAcf(s01_ts[,2], main = "") + ggAcf(s01_ts[,2], main = ""))
Autoplots and seasonal plots for S02
(autoplot(s02_ts, facets = 2)) /
(ggseasonplot(s02_ts[,1], main = "Var02") + ggseasonplot(s02_ts[,2], main = "Var03")) /
(ggsubseriesplot(s02_ts[,1]) + ggsubseriesplot(s02_ts[,2])) /
(ggAcf(s02_ts[,2], main = "") + ggAcf(s02_ts[,2], main = ""))
Autoplots and seasonal plots for S03
(autoplot(s03_ts, facets = 2)) /
(ggseasonplot(s03_ts[,1], main = "Var05") + ggseasonplot(s03_ts[,2], main = "Var07")) /
(ggsubseriesplot(s03_ts[,1]) + ggsubseriesplot(s03_ts[,2])) /
(ggAcf(s03_ts[,2], main = "") + ggAcf(s03_ts[,2], main = ""))
Autoplots and seasonal plots for S04
(autoplot(s04_ts, facets = 2)) /
(ggseasonplot(s04_ts[,1], main = "Var01") + ggseasonplot(s04_ts[,2], main = "Var02")) /
(ggsubseriesplot(s04_ts[,1]) + ggsubseriesplot(s04_ts[,2])) /
(ggAcf(s04_ts[,2], main = "") + ggAcf(s04_ts[,2], main = ""))
Autoplots and seasonal plots for S05
(autoplot(s05_ts, facets = 2)) /
(ggseasonplot(s05_ts[,1], main = "Var02") + ggseasonplot(s05_ts[,2], main = "Var03")) /
(ggsubseriesplot(s05_ts[,1]) + ggsubseriesplot(s05_ts[,2])) /
(ggAcf(s05_ts[,2], main = "") + ggAcf(s05_ts[,2], main = ""))
Autoplots and seasonal plots for S06
(autoplot(s06_ts, facets = 2)) /
(ggseasonplot(s06_ts[,1], main = "Var05") + ggseasonplot(s06_ts[,2], main = "Var07")) /
(ggsubseriesplot(s06_ts[,1]) + ggsubseriesplot(s06_ts[,2])) /
(ggAcf(s06_ts[,2], main = "") + ggAcf(s06_ts[,2], main = ""))
p1 <- (gglagplot(s01_ts[,1]) + theme(legend.position = "none") + gglagplot(s01_ts[,2]) + theme(legend.position = "none") )
p2 <- gglagplot(s02_ts[,1]) + theme(legend.position = "none") + gglagplot(s02_ts[,2]) + theme(legend.position = "none")
p3 <- gglagplot(s03_ts[,1]) + theme(legend.position = "none") + gglagplot(s03_ts[,2])
p4 <- gglagplot(s04_ts[,1]) + theme(legend.position = "none") + gglagplot(s04_ts[,2]) + theme(legend.position = "none")
p5 <- gglagplot(s05_ts[,1]) + theme(legend.position = "none") + gglagplot(s05_ts[,2]) + theme(legend.position = "none")
p6 <- gglagplot(s06_ts[,1]) + theme(legend.position = "none") + gglagplot(s06_ts[,2])
grid.arrange(p1, p2, p3, p4, p5, p6, nrow = 2)
#STL using default values
fit_stl_1 <- stl(s01_ts[,1], s.window = "periodic")
#STL using default values
fit_stl_2 <- stl(s01_ts[,2], s.window = "periodic")
#forecast of seasonaly adjusted data
f1 <- fit_stl_1 %>% seasadj() %>% naive()%>%
autoplot() + ylab("S01: Var01")
#forecast of seasonaly adjusted data
f2 <- fit_stl_2 %>% seasadj() %>% naive()%>%
autoplot() + ylab("S01: Var02")
#forecast from SLT + Random walk
f3 <- fit_stl_1 %>% forecast(method="naive") %>%
autoplot() + ylab("S01: Var01")
#forecast from SLT + Random walk
f4 <- fit_stl_2 %>% forecast(method="naive") %>%
autoplot() + ylab("S01: Var02")
#forecast from Holt-Winters
hw1 <- HoltWinters(s01_ts[,1])
#forecast from Holt-Winters
hw2 <- HoltWinters(s01_ts[,2])
f5 <- hw1 %>% forecast() %>%
autoplot() + ylab("S01: Var01")
f6 <- hw2 %>% forecast() %>%
autoplot() + ylab("S01: Var02")
(f1 + f2) / (f3 + f4) / (f5 + f6)
#STL using default values
fit_stl_1 <- stl(s02_ts[,1], s.window = "periodic")
#STL using default values
fit_stl_2 <- stl(s02_ts[,2], s.window = "periodic")
#forecast of seasonaly adjusted data
f1 <- fit_stl_1 %>% seasadj() %>% naive()%>%
autoplot() + ylab("S02: Var02")
#forecast of seasonaly adjusted data
f2 <- fit_stl_2 %>% seasadj() %>% naive()%>%
autoplot() + ylab("S02: Var03")
#forecast from SLT + Random walk
f3 <- fit_stl_1 %>% forecast(method="naive") %>%
autoplot() + ylab("S02: Var02")
#forecast from SLT + Random walk
f4 <- fit_stl_2 %>% forecast(method="naive") %>%
autoplot() + ylab("S02: Var03")
#forecast from Holt-Winters
hw1 <- HoltWinters(s02_ts[,1])
#forecast from Holt-Winters
hw2 <- HoltWinters(s02_ts[,2])
f5 <- hw1 %>% forecast() %>%
autoplot() + ylab("S02: Var02")
f6 <- hw2 %>% forecast() %>%
autoplot() + ylab("S02: Var03")
(f1 + f2) / (f3 + f4) / (f5 + f6)
#STL using default values
fit_stl_1 <- stl(s03_ts[,1], s.window = "periodic")
#STL using default values
fit_stl_2 <- stl(s03_ts[,2], s.window = "periodic")
#forecast of seasonaly adjusted data
f1 <- fit_stl_1 %>% seasadj() %>% naive()%>%
autoplot() + ylab("S03: Var05")
#forecast of seasonaly adjusted data
f2 <- fit_stl_2 %>% seasadj() %>% naive()%>%
autoplot() + ylab("S03: Var07")
#forecast from SLT + Random walk
f3 <- fit_stl_1 %>% forecast(method="naive") %>%
autoplot() + ylab("S03: Var05")
#forecast from SLT + Random walk
f4 <- fit_stl_2 %>% forecast(method="naive") %>%
autoplot() + ylab("S03: Var07")
#forecast from Holt-Winters
hw1 <- HoltWinters(s03_ts[,1])
#forecast from Holt-Winters
hw2 <- HoltWinters(s03_ts[,2])
f5 <- hw1 %>% forecast() %>%
autoplot() + ylab("S03: Var05")
f6 <- hw2 %>% forecast() %>%
autoplot() + ylab("S03: Var07")
(f1 + f2) / (f3 + f4) / (f5 + f6)
#STL using default values
fit_stl_1 <- stl(s04_ts[,1], s.window = "periodic")
#STL using default values
fit_stl_2 <- stl(s04_ts[,2], s.window = "periodic")
#forecast of seasonaly adjusted data
f1 <- fit_stl_1 %>% seasadj() %>% naive()%>%
autoplot() + ylab("S04: Var01")
#forecast of seasonaly adjusted data
f2 <- fit_stl_2 %>% seasadj() %>% naive()%>%
autoplot() + ylab("S04: Var02")
#forecast from SLT + Random walk
f3 <- fit_stl_1 %>% forecast(method="naive") %>%
autoplot() + ylab("S04: Var01")
#forecast from SLT + Random walk
f4 <- fit_stl_2 %>% forecast(method="naive") %>%
autoplot() + ylab("S04: Var02")
#forecast from Holt-Winters
hw1 <- HoltWinters(s04_ts[,1])
#forecast from Holt-Winters
hw2 <- HoltWinters(s04_ts[,2])
f5 <- hw1 %>% forecast() %>%
autoplot() + ylab("S04: Var01")
f6 <- hw2 %>% forecast() %>%
autoplot() + ylab("S04: Var02")
(f1 + f2) / (f3 + f4) / (f5 + f6)
#STL using default values
fit_stl_1 <- stl(s05_ts[,1], s.window = "periodic")
#STL using default values
fit_stl_2 <- stl(s05_ts[,2], s.window = "periodic")
#forecast of seasonaly adjusted data
f1 <- fit_stl_1 %>% seasadj() %>% naive()%>%
autoplot() + ylab("S05: Var02")
#forecast of seasonaly adjusted data
f2 <- fit_stl_2 %>% seasadj() %>% naive()%>%
autoplot() + ylab("S05: Var03")
#forecast from SLT + Random walk
f3 <- fit_stl_1 %>% forecast(method="naive") %>%
autoplot() + ylab("S05: Var02")
#forecast from SLT + Random walk
f4 <- fit_stl_2 %>% forecast(method="naive") %>%
autoplot() + ylab("S05: Var03")
#forecast from Holt-Winters
hw1 <- HoltWinters(s05_ts[,1])
#forecast from Holt-Winters
hw2 <- HoltWinters(s05_ts[,2])
f5 <- hw1 %>% forecast() %>%
autoplot() + ylab("S05: Var02")
f6 <- hw2 %>% forecast() %>%
autoplot() + ylab("S05: Var03")
(f1 + f2) / (f3 + f4) / (f5 + f6)
#STL using default values
fit_stl_1 <- stl(s06_ts[,1], s.window = "periodic")
#STL using default values
fit_stl_2 <- stl(s06_ts[,2], s.window = "periodic")
#forecast of seasonaly adjusted data
f1 <- fit_stl_1 %>% seasadj() %>% naive()%>%
autoplot() + ylab("S06: Var05")
#forecast of seasonaly adjusted data
f2 <- fit_stl_2 %>% seasadj() %>% naive()%>%
autoplot() + ylab("S06: Var07")
#forecast from SLT + Random walk
f3 <- fit_stl_1 %>% forecast(method="naive") %>%
autoplot() + ylab("S06: Var05")
#forecast from SLT + Random walk
f4 <- fit_stl_2 %>% forecast(method="naive") %>%
autoplot() + ylab("S06: Var07")
#forecast from Holt-Winters
hw1 <- HoltWinters(s06_ts[,1])
#forecast from Holt-Winters
hw2 <- HoltWinters(s06_ts[,2])
f5 <- hw1 %>% forecast() %>%
autoplot() + ylab("S06: Var05")
f6 <- hw2 %>% forecast() %>%
autoplot() + ylab("S06: Var07")
(f1 + f2) / (f3 + f4) / (f5 + f6)
# simple exponential smoothing since there is no clear trend or seasonal pattern
# Fit ses models to Var01 & Var02
fit_s01_ses_1 <- ses(s01_ts[,1])
fit_s01_ses_2 <- ses(s01_ts[,2])
# check residuals
checkresiduals(fit_s01_ses_1)
#>
#> Ljung-Box test
#>
#> data: Residuals from Simple exponential smoothing
#> Q* = 11.445, df = 15, p-value = 0.7205
#>
#> Model df: 2. Total lags used: 17
checkresiduals(fit_s01_ses_2)
#>
#> Ljung-Box test
#>
#> data: Residuals from Simple exponential smoothing
#> Q* = 16.925, df = 15, p-value = 0.3234
#>
#> Model df: 2. Total lags used: 17
# forecast
fc_s01_ses_1 <- forecast(fit_s01_ses_1)
fc_s01_ses_2 <- forecast(fit_s01_ses_2)
# plot forecasts
fses_S01_1 <- autoplot(fc_s01_ses_1) + ylab("S01: Var01") +
autolayer(fitted(fc_s01_ses_1))
fses_S01_2 <- autoplot(fc_s01_ses_2) + ylab("S01: Var02") +
autolayer(fitted(fc_s01_ses_2))
(fses_S01_1 + fses_S01_2)
# simple exponential smoothing since there is no clear trend or seasonal pattern
# Fit ses models to Var02 & Var03
fit_s02_ses_2 <- ses(s02_ts[,1])
fit_s02_ses_3 <- ses(s02_ts[,2])
# check residuals
checkresiduals(fit_s02_ses_2)
#>
#> Ljung-Box test
#>
#> data: Residuals from Simple exponential smoothing
#> Q* = 34.481, df = 15, p-value = 0.002914
#>
#> Model df: 2. Total lags used: 17
checkresiduals(fit_s02_ses_3)
#>
#> Ljung-Box test
#>
#> data: Residuals from Simple exponential smoothing
#> Q* = 15.73, df = 15, p-value = 0.4002
#>
#> Model df: 2. Total lags used: 17
# forecast
fc_s02_ses_2 <- forecast(fit_s02_ses_2)
fc_s02_ses_3 <- forecast(fit_s02_ses_3)
# plot forecasts
fses_S02_2 <- autoplot(fc_s02_ses_2) + ylab("S02: Var02") +
autolayer(fitted(fc_s02_ses_2))
fses_S02_3 <- autoplot(fc_s02_ses_3) + ylab("S02: Var03") +
autolayer(fitted(fc_s02_ses_3))
(fses_S02_2 + fses_S02_3)
# simple exponential smoothing since there is no clear trend or seasonal pattern
# Fit ses models to Var05 & Var07
fit_s03_ses_5 <- ses(s03_ts[,1])
fit_s03_ses_7 <- ses(s03_ts[,2])
# check residuals
checkresiduals(fit_s03_ses_5)
#>
#> Ljung-Box test
#>
#> data: Residuals from Simple exponential smoothing
#> Q* = 16.047, df = 15, p-value = 0.3789
#>
#> Model df: 2. Total lags used: 17
checkresiduals(fit_s03_ses_7)
#>
#> Ljung-Box test
#>
#> data: Residuals from Simple exponential smoothing
#> Q* = 17.561, df = 15, p-value = 0.2864
#>
#> Model df: 2. Total lags used: 17
# forecast
fc_s03_ses_5 <- forecast(fit_s03_ses_5)
fc_s03_ses_7 <- forecast(fit_s03_ses_7)
# plot forecasts
fses_S03_5 <- autoplot(fc_s03_ses_5) + ylab("S03: Var05") +
autolayer(fitted(fc_s03_ses_5))
fses_S03_7 <- autoplot(fc_s03_ses_7) + ylab("S03: Var07") +
autolayer(fitted(fc_s03_ses_7))
(fses_S03_5 + fses_S03_7)
# simple exponential smoothing since there is no clear trend or seasonal pattern
# Fit ses models to Var01 & Var02
fit_s04_ses_1 <- ses(s04_ts[,1])
fit_s04_ses_2 <- ses(s04_ts[,2])
# check residuals
checkresiduals(fit_s04_ses_1)
#>
#> Ljung-Box test
#>
#> data: Residuals from Simple exponential smoothing
#> Q* = 14.47, df = 15, p-value = 0.4902
#>
#> Model df: 2. Total lags used: 17
checkresiduals(fit_s04_ses_2)
#>
#> Ljung-Box test
#>
#> data: Residuals from Simple exponential smoothing
#> Q* = 21.273, df = 15, p-value = 0.1283
#>
#> Model df: 2. Total lags used: 17
# forecast next 140 periods
fc_s04_ses_1 <- forecast(fit_s04_ses_1)
fc_s04_ses_2 <- forecast(fit_s04_ses_2)
# plot forecasts
fses_s04_1 <- autoplot(fc_s04_ses_1) + ylab("s04: Var01") +
autolayer(fitted(fc_s04_ses_1))
fses_s04_2 <- autoplot(fc_s04_ses_2) + ylab("s04: Var02") +
autolayer(fitted(fc_s04_ses_2))
(fses_s04_1 + fses_s04_2)
# simple exponential smoothing since there is no clear trend or seasonal pattern
# Fit ses models to Var02 & Var03
fit_s05_ses_2 <- ses(s05_ts[,1])
fit_s05_ses_3 <- ses(s05_ts[,2])
# check residuals
checkresiduals(fit_s05_ses_2)
#>
#> Ljung-Box test
#>
#> data: Residuals from Simple exponential smoothing
#> Q* = 14.529, df = 15, p-value = 0.4859
#>
#> Model df: 2. Total lags used: 17
checkresiduals(fit_s05_ses_3)
#>
#> Ljung-Box test
#>
#> data: Residuals from Simple exponential smoothing
#> Q* = 16.057, df = 15, p-value = 0.3783
#>
#> Model df: 2. Total lags used: 17
# forecast
fc_s05_ses_2 <- forecast(fit_s05_ses_2)
fc_s05_ses_3 <- forecast(fit_s05_ses_3)
# plot forecasts
fses_s05_2 <- autoplot(fc_s05_ses_2) + ylab("s05: Var02") +
autolayer(fitted(fc_s05_ses_2))
fses_s05_3 <- autoplot(fc_s05_ses_3) + ylab("s05: Var03") +
autolayer(fitted(fc_s05_ses_3))
(fses_s05_2 + fses_s05_3)
# simple exponential smoothing since there is no clear trend or seasonal pattern
# Fit ses models to Var05 & Var07
fit_s06_ses_5 <- ses(s06_ts[,1])
fit_s06_ses_7 <- ses(s06_ts[,2])
# check residuals
checkresiduals(fit_s06_ses_5)
#>
#> Ljung-Box test
#>
#> data: Residuals from Simple exponential smoothing
#> Q* = 7.0369, df = 15, p-value = 0.9566
#>
#> Model df: 2. Total lags used: 17
checkresiduals(fit_s06_ses_7)
#>
#> Ljung-Box test
#>
#> data: Residuals from Simple exponential smoothing
#> Q* = 6.1976, df = 15, p-value = 0.9762
#>
#> Model df: 2. Total lags used: 17
# forecast
fc_s06_ses_5 <- forecast(fit_s06_ses_5)
fc_s06_ses_7 <- forecast(fit_s06_ses_7)
# plot forecasts
fses_s06_5 <- autoplot(fc_s06_ses_5) + ylab("s06: Var05") +
autolayer(fitted(fc_s06_ses_5))
fses_s06_7 <- autoplot(fc_s06_ses_7) + ylab("s06: Var07") +
autolayer(fitted(fc_s06_ses_7))
(fses_s06_5 + fses_s06_7)
# Fit ETS models to Var01 & Var02
fit_s01_ETS_1 <- ets(s01_ts[,1])
fit_s01_ETS_2 <- ets(s01_ts[,2])
# check residuals
checkresiduals(fit_s01_ETS_1)
#>
#> Ljung-Box test
#>
#> data: Residuals from ETS(M,N,N)
#> Q* = 11.083, df = 15, p-value = 0.7467
#>
#> Model df: 2. Total lags used: 17
checkresiduals(fit_s01_ETS_2)
#>
#> Ljung-Box test
#>
#> data: Residuals from ETS(M,N,N)
#> Q* = 20.026, df = 15, p-value = 0.171
#>
#> Model df: 2. Total lags used: 17
# forecast
fc_s01_ETS_1 <- forecast(fit_s01_ETS_1)
fc_s01_ETS_2 <- forecast(fit_s01_ETS_2)
# plot forecasts
fets_S01_1 <- autoplot(fc_s01_ETS_1) + ylab("S01: Var01") +
autolayer(fitted(fc_s01_ETS_1))
fets_S01_2 <- autoplot(fc_s01_ETS_2) + ylab("S01: Var02") +
autolayer(fitted(fc_s01_ETS_2))
(fets_S01_1 + fets_S01_2)
# Fit ETS models to Var02 & Var03
fit_s02_ETS_2 <- ets(s02_ts[,1])
fit_s02_ETS_3 <- ets(s02_ts[,2])
# check residuals
checkresiduals(fit_s02_ETS_2)
#>
#> Ljung-Box test
#>
#> data: Residuals from ETS(M,N,N)
#> Q* = 21.575, df = 15, p-value = 0.1194
#>
#> Model df: 2. Total lags used: 17
checkresiduals(fit_s02_ETS_3)
#>
#> Ljung-Box test
#>
#> data: Residuals from ETS(M,N,N)
#> Q* = 16.302, df = 15, p-value = 0.3623
#>
#> Model df: 2. Total lags used: 17
# forecast
fc_s02_ETS_2 <- forecast(fit_s02_ETS_2)
fc_s02_ETS_3 <- forecast(fit_s02_ETS_3)
# plot forecasts
fets_S02_2 <- autoplot(fc_s02_ETS_2) + ylab("S02: Var02") +
autolayer(fitted(fc_s02_ETS_2))
fets_S02_3 <- autoplot(fc_s02_ETS_3) + ylab("S02: Var03") +
autolayer(fitted(fc_s02_ETS_3))
(fets_S02_2 + fets_S02_3)
# Fit ETS models to Var05 & Var07
fit_s03_ETS_5 <- ets(s03_ts[,1])
fit_s03_ETS_7 <- ets(s03_ts[,2])
# check residuals
checkresiduals(fit_s03_ETS_5)
#>
#> Ljung-Box test
#>
#> data: Residuals from ETS(M,N,N)
#> Q* = 18.497, df = 15, p-value = 0.2375
#>
#> Model df: 2. Total lags used: 17
checkresiduals(fit_s03_ETS_7)
#>
#> Ljung-Box test
#>
#> data: Residuals from ETS(M,N,N)
#> Q* = 19.426, df = 15, p-value = 0.1951
#>
#> Model df: 2. Total lags used: 17
# forecast
fc_s03_ETS_5 <- forecast(fit_s03_ETS_5)
fc_s03_ETS_7 <- forecast(fit_s03_ETS_7)
# plot forecasts
fets_S03_5 <- autoplot(fc_s03_ETS_5) + ylab("S03: Var05") +
autolayer(fitted(fc_s03_ETS_5))
fets_S03_7 <- autoplot(fc_s03_ETS_7) + ylab("S03: Var07") +
autolayer(fitted(fc_s03_ETS_7))
(fets_S03_5 + fets_S03_7)
# Fit ETS models to Var01 & Var02
fit_s04_ETS_1 <- ets(s04_ts[,1])
fit_s04_ETS_2 <- ets(s04_ts[,2])
# check residuals
checkresiduals(fit_s04_ETS_1)
#>
#> Ljung-Box test
#>
#> data: Residuals from ETS(M,N,N)
#> Q* = 13.643, df = 15, p-value = 0.5527
#>
#> Model df: 2. Total lags used: 17
checkresiduals(fit_s04_ETS_2)
#>
#> Ljung-Box test
#>
#> data: Residuals from ETS(M,N,N)
#> Q* = 17.795, df = 15, p-value = 0.2736
#>
#> Model df: 2. Total lags used: 17
# forecast next 140 periods
fc_s04_ETS_1 <- forecast(fit_s04_ETS_1)
fc_s04_ETS_2 <- forecast(fit_s04_ETS_2)
# plot forecasts
fets_s04_1 <- autoplot(fc_s04_ETS_1) + ylab("s04: Var01") +
autolayer(fitted(fc_s04_ETS_1))
fets_s04_2 <- autoplot(fc_s04_ETS_2) + ylab("s04: Var02") +
autolayer(fitted(fc_s04_ETS_2))
(fets_s04_1 + fets_s04_2)
# Fit ETS models to Var02 & Var03
fit_s05_ETS_2 <- ets(s05_ts[,1])
fit_s05_ETS_3 <- ets(s05_ts[,2])
# check residuals
checkresiduals(fit_s05_ETS_2)
#>
#> Ljung-Box test
#>
#> data: Residuals from ETS(A,N,N)
#> Q* = 14.531, df = 15, p-value = 0.4857
#>
#> Model df: 2. Total lags used: 17
checkresiduals(fit_s05_ETS_3)
#>
#> Ljung-Box test
#>
#> data: Residuals from ETS(A,N,N)
#> Q* = 16.057, df = 15, p-value = 0.3782
#>
#> Model df: 2. Total lags used: 17
# forecast
fc_s05_ETS_2 <- forecast(fit_s05_ETS_2)
fc_s05_ETS_3 <- forecast(fit_s05_ETS_3)
# plot forecasts
fets_s05_2 <- autoplot(fc_s05_ETS_2) + ylab("s05: Var02") +
autolayer(fitted(fc_s05_ETS_2))
fets_s05_3 <- autoplot(fc_s05_ETS_3) + ylab("s05: Var03") +
autolayer(fitted(fc_s05_ETS_3))
(fets_s05_2 + fets_s05_3)
# Fit ETS models to Var05 & Var07
fit_s06_ETS_5 <- ets(s06_ts[,1])
fit_s06_ETS_7 <- ets(s06_ts[,2])
# check residuals
checkresiduals(fit_s06_ETS_5)
#>
#> Ljung-Box test
#>
#> data: Residuals from ETS(A,N,N)
#> Q* = 7.0367, df = 15, p-value = 0.9566
#>
#> Model df: 2. Total lags used: 17
checkresiduals(fit_s06_ETS_7)
#>
#> Ljung-Box test
#>
#> data: Residuals from ETS(A,N,N)
#> Q* = 6.197, df = 15, p-value = 0.9762
#>
#> Model df: 2. Total lags used: 17
# forecast
fc_s06_ETS_5 <- forecast(fit_s06_ETS_5)
fc_s06_ETS_7 <- forecast(fit_s06_ETS_7)
# plot forecasts
fets_s06_5 <- autoplot(fc_s06_ETS_5) + ylab("s06: Var05") +
autolayer(fitted(fc_s06_ETS_5))
fets_s06_7 <- autoplot(fc_s06_ETS_7) + ylab("s06: Var07") +
autolayer(fitted(fc_s06_ETS_7))
(fets_s06_5 + fets_s06_7)
# Fit ARIMA models to Var01 & Var02
fit_s01_ARIMA_1 <- auto.arima(s01_ts[,1], stepwise = TRUE)
fit_s01_ARIMA_2 <- auto.arima(s01_ts[,2], stepwise = TRUE)
# check residuals
checkresiduals(fit_s01_ARIMA_1)
#>
#> Ljung-Box test
#>
#> data: Residuals from ARIMA(0,1,0)
#> Q* = 11.465, df = 17, p-value = 0.8314
#>
#> Model df: 0. Total lags used: 17
checkresiduals(fit_s01_ARIMA_2)
#>
#> Ljung-Box test
#>
#> data: Residuals from ARIMA(1,0,0) with non-zero mean
#> Q* = 13.722, df = 15, p-value = 0.5467
#>
#> Model df: 2. Total lags used: 17
# forecast
fc_s01_ARIMA_1 <- forecast(fit_s01_ARIMA_1)
fc_s01_ARIMA_2 <- forecast(fit_s01_ARIMA_2)
# plot forecasts
fa_S01_1 <- autoplot(fc_s01_ARIMA_1) + ylab("S01: Var01") +
autolayer(fitted(fc_s01_ARIMA_1))
fa_S01_2 <- autoplot(fc_s01_ARIMA_2) + ylab("S01: Var02") +
autolayer(fitted(fc_s01_ARIMA_2))
(fa_S01_1 + fa_S01_2)
# Fit ARIMA models to Var02 & Var03
fit_s02_ARIMA_2 <- auto.arima(s02_ts[,1], stepwise = TRUE)
fit_s02_ARIMA_3 <- auto.arima(s02_ts[,2], stepwise = TRUE)
# check residuals
checkresiduals(fit_s02_ARIMA_2)
#>
#> Ljung-Box test
#>
#> data: Residuals from ARIMA(0,0,0) with non-zero mean
#> Q* = 31.775, df = 16, p-value = 0.0107
#>
#> Model df: 1. Total lags used: 17
checkresiduals(fit_s02_ARIMA_3)
#>
#> Ljung-Box test
#>
#> data: Residuals from ARIMA(0,1,2)
#> Q* = 10.44, df = 15, p-value = 0.7912
#>
#> Model df: 2. Total lags used: 17
# forecast
fc_s02_ARIMA_2 <- forecast(fit_s02_ARIMA_2)
fc_s02_ARIMA_3 <- forecast(fit_s02_ARIMA_3)
# plot forecasts
fa_S02_2 <- autoplot(fc_s02_ARIMA_2) + ylab("S02: Var02") + ylab("S01: Var01") +
autolayer(fitted(fc_s02_ARIMA_2))
fa_S02_3 <- autoplot(fc_s02_ARIMA_3) + ylab("S02: Var03") + ylab("S01: Var01") +
autolayer(fitted(fc_s02_ARIMA_3))
(fa_S02_2 + fa_S02_3)
# Fit ARIMA models to Var05 & Var07
fit_s03_ARIMA_5 <- auto.arima(s03_ts[,1], stepwise = TRUE)
fit_s03_ARIMA_7 <- auto.arima(s03_ts[,2], stepwise = TRUE)
# check residuals
checkresiduals(fit_s03_ARIMA_5)
#>
#> Ljung-Box test
#>
#> data: Residuals from ARIMA(0,1,0)
#> Q* = 16.015, df = 17, p-value = 0.5228
#>
#> Model df: 0. Total lags used: 17
checkresiduals(fit_s03_ARIMA_7)
#>
#> Ljung-Box test
#>
#> data: Residuals from ARIMA(0,1,0)
#> Q* = 17.397, df = 17, p-value = 0.4278
#>
#> Model df: 0. Total lags used: 17
# forecast
fc_s03_ARIMA_5 <- forecast(fit_s03_ARIMA_5)
fc_s03_ARIMA_7 <- forecast(fit_s03_ARIMA_7)
# plot forecasts
fa_S03_5 <- autoplot(fc_s03_ARIMA_5) + ylab("S03: Var05") + ylab("S01: Var01") +
autolayer(fitted(fc_s03_ARIMA_5))
fa_S03_7 <- autoplot(fc_s03_ARIMA_7) + ylab("S03: Var07") + ylab("S01: Var01") +
autolayer(fitted(fc_s03_ARIMA_7))
(fa_S03_5 + fa_S03_7)
# Fit ARIMA models to Var01 & Var02
fit_s04_ARIMA_1 <- auto.arima(s04_ts[,1], stepwise = TRUE)
fit_s04_ARIMA_2 <- auto.arima(s04_ts[,2], stepwise = TRUE)
# check residuals
checkresiduals(fit_s04_ARIMA_1)
#>
#> Ljung-Box test
#>
#> data: Residuals from ARIMA(0,1,0)
#> Q* = 14.463, df = 17, p-value = 0.6341
#>
#> Model df: 0. Total lags used: 17
checkresiduals(fit_s04_ARIMA_2)
#>
#> Ljung-Box test
#>
#> data: Residuals from ARIMA(1,0,0)(0,0,1)[12] with non-zero mean
#> Q* = 14.687, df = 14, p-value = 0.3999
#>
#> Model df: 3. Total lags used: 17
# forecast
fc_s04_ARIMA_1 <- forecast(fit_s04_ARIMA_1)
fc_s04_ARIMA_2 <- forecast(fit_s04_ARIMA_2)
# plot forecasts
fa_S04_1 <- autoplot(fc_s04_ARIMA_1) + ylab("S04: Var01") + ylab("S01: Var01") +
autolayer(fitted(fc_s04_ARIMA_1))
fa_S04_2 <- autoplot(fc_s04_ARIMA_2) + ylab("S04: Var02") + ylab("S01: Var01") +
autolayer(fitted(fc_s04_ARIMA_2))
(fa_S04_1 + fa_S04_2)
# Fit ARIMA models to Var02 & Var03
fit_s05_ARIMA_2 <- auto.arima(s05_ts[,1], stepwise = TRUE)
fit_s05_ARIMA_3 <- auto.arima(s05_ts[,2], stepwise = TRUE)
# check residuals
checkresiduals(fit_s05_ARIMA_2)
#>
#> Ljung-Box test
#>
#> data: Residuals from ARIMA(2,0,0) with non-zero mean
#> Q* = 12.202, df = 14, p-value = 0.5901
#>
#> Model df: 3. Total lags used: 17
checkresiduals(fit_s05_ARIMA_3)
#>
#> Ljung-Box test
#>
#> data: Residuals from ARIMA(0,1,2)
#> Q* = 13.089, df = 15, p-value = 0.5954
#>
#> Model df: 2. Total lags used: 17
# forecast
fc_s05_ARIMA_2 <- forecast(fit_s05_ARIMA_2)
fc_s05_ARIMA_3 <- forecast(fit_s05_ARIMA_3)
# plot forecasts
fa_S05_2 <- autoplot(fc_s05_ARIMA_2) + ylab("S05: Var02") + ylab("S01: Var01") +
autolayer(fitted(fc_s05_ARIMA_2))
fa_S05_3 <- autoplot(fc_s05_ARIMA_3) + ylab("S05: Var03") + ylab("S01: Var01") +
autolayer(fitted(fc_s05_ARIMA_3))
(fa_S05_2 + fa_S05_3)
# Fit ARIMA models to Var05 & Var07
fit_s06_ARIMA_5 <- auto.arima(s06_ts[,1], stepwise = TRUE)
fit_s06_ARIMA_7 <- auto.arima(s06_ts[,2], stepwise = TRUE)
# check residuals
checkresiduals(fit_s06_ARIMA_5)
#>
#> Ljung-Box test
#>
#> data: Residuals from ARIMA(0,1,1)
#> Q* = 6.5126, df = 16, p-value = 0.9816
#>
#> Model df: 1. Total lags used: 17
checkresiduals(fit_s06_ARIMA_7)
#>
#> Ljung-Box test
#>
#> data: Residuals from ARIMA(0,1,1)
#> Q* = 6.0207, df = 16, p-value = 0.9879
#>
#> Model df: 1. Total lags used: 17
# forecast
fc_s06_ARIMA_5 <- forecast(fit_s06_ARIMA_5)
fc_s06_ARIMA_7 <- forecast(fit_s06_ARIMA_7)
# plot forecasts
fa_S06_5 <- autoplot(fc_s06_ARIMA_5) + ylab("S06: Var05") + ylab("S01: Var01") +
autolayer(fitted(fc_s06_ARIMA_5))
fa_S06_7 <- autoplot(fc_s06_ARIMA_7) + ylab("S06: Var07") + ylab("S01: Var01") +
autolayer(fitted(fc_s06_ARIMA_7))
(fa_S06_5 + fa_S06_7)